#coding:utf8

import time
import random
import requests
import re
from collect.clean import clean_data
import threading
import os

def jdpa(jdurl):
    
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
            'Accept':'*/*',
            'Accept-Language':'zh-CN,zh;q=0.8',
            'Connection':'keep-alive',
            'Referer':'https://item.jd.com/5181400.html'
    }
    cookie = {'TrackID':'1QkzwnCO4RITuj5NvDmKyd71iHkLHyTiUz7m-Uwppc5XnghkvmYQK8UOHCv-bbVSbmQhr7eDVbik-2Gilty4T6Q',
            '__jda':'122270672.1705220178.1503567826.1508236649.1508239656.15',
            '__jdb':'122270672.1.1705220178',
            '__jdc':'122270672',
            '__jdu':'1705220178',
            '__jdv':'122270672|baidu-pinzhuan|t_288551095_baidupinzhuan|cpc|0f3d30c8dba7459bb52f2eb5eba8ac7d_0_ab73aaa3c5b24e83bb585dda2c6ce031|1508236649501',
            'areaId':'31',
            'cn':'2',
            'ipLoc-djd':'36685',
            'ipLocation':'%u65b0%u7586',
            'mx':'0_X',
            'rkv':'V0800',
            'user-key':'216123d5-4ed3-47b0-9289-12345',
            'xtest':'4657.553.d9798cdf31c02d86b8b81cc119d94836.b7a782741f667201b54880c925faec4b'
    }
    html_code = requests.get(jdurl)  #https://item.jd.com/1069555.html
    comment98vv_ = re.findall(r"commentVersion:'(.+?)',", html_code.text)
#     score = '0'
    productId=str(jdurl.split("/")[-1].split(".")[0])

    #第一部分URL
    url1='https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv'+str(comment98vv_[0])+'&productId='+productId+'&score='
    #第二部分URL
    url2='&sortType=5&page='
    #第三部分URL
    url3='&pageSize=10&isShadowSku=0&fold=1'
    #获得\emotion目录
    data_dir = os.path.dirname(os.path.dirname(os.getcwd()))+'\\data\\'
    
    #乱序产生0-80的不重复随机数95
    ran_num=random.sample(range(10), 10)
    
    def rs(score,fname):
        for i in ran_num:
            a = ran_num[0]
            if i == a:
                i = str(i)
                url = (url1+score+url2+i+url3)
                r = requests.get(url = url,headers = headers,cookies = cookie)
                html = r.content
            else:
                i = str(i)
                url = (url1+score+url2+i+url3)
                r = requests.get(url = url,headers = headers,cookies = cookie)
                html2 = r.content
                html = html + html2
            time.sleep(random.choice(range(5))) 
            print ("抓取的地址:",url,"\n","状态:",r)
               
        file = open(data_dir+fname, "wb")
        file.write(html)
        file.close()
        clean_data(data_dir+fname)
    
    r1 = threading.Thread(target=rs,args=('3','pos.txt'))
    r2 = threading.Thread(target=rs,args=('2','zhong.txt'))
    r3 = threading.Thread(target=rs,args=('1','neg.txt')) 
    r1.start();r2.start();r3.start()  

